from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly
plotly.offline.init_notebook_mode()
from sklearn import datasets
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
df_diabetes=pd.DataFrame(data=diabetes_X,columns=datasets.load_diabetes().feature_names)
print(df_diabetes)
age sex bmi bp s1 s2 s3
0 0.038076 0.050680 0.061696 0.021872 -0.044223 -0.034821 -0.043401 \
1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163 0.074412
2 0.085299 0.050680 0.044451 -0.005670 -0.045599 -0.034194 -0.032356
3 -0.089063 -0.044642 -0.011595 -0.036656 0.012191 0.024991 -0.036038
4 0.005383 -0.044642 -0.036385 0.021872 0.003935 0.015596 0.008142
.. ... ... ... ... ... ... ...
437 0.041708 0.050680 0.019662 0.059744 -0.005697 -0.002566 -0.028674
438 -0.005515 0.050680 -0.015906 -0.067642 0.049341 0.079165 -0.028674
439 0.041708 0.050680 -0.015906 0.017293 -0.037344 -0.013840 -0.024993
440 -0.045472 -0.044642 0.039062 0.001215 0.016318 0.015283 -0.028674
441 -0.045472 -0.044642 -0.073030 -0.081413 0.083740 0.027809 0.173816
s4 s5 s6
0 -0.002592 0.019907 -0.017646
1 -0.039493 -0.068332 -0.092204
2 -0.002592 0.002861 -0.025930
3 0.034309 0.022688 -0.009362
4 -0.002592 -0.031988 -0.046641
.. ... ... ...
437 -0.002592 0.031193 0.007207
438 0.034309 -0.018114 0.044485
439 -0.011080 -0.046883 0.015491
440 0.026560 0.044529 -0.025930
441 -0.039493 -0.004222 0.003064
[442 rows x 10 columns]
X = diabetes_X[:, 2]
X_train, X_test, y_train, y_test = train_test_split(X, diabetes_y, test_size=0.2, random_state=42)
X_re = X_train.reshape(-1,1)
X_re2 = X_test.reshape(-1,1)
model = LinearRegression()
model.fit(X_re, y_train)
y_pred = model.predict(X_re2)
y_pred2 = model.predict(X_re)
sns.scatterplot(x=X_train, y=y_train, label='Training Data', color='Blue')
sns.scatterplot(x=X_test, y=y_test, label='Test Data', color='Red')
sns.lineplot(x=X_test, y=y_pred, color='Green', label='Linear Regression Model')
plt.xlabel('BMI')
plt.ylabel('Disease Progression')
plt.title('BMI vs Disease Progression')
plt.legend()
plt.show()
plt.scatter(X_train, y_train, color='pink', label='Training Data')
plt.scatter(X_test, y_test, color='purple', label='Test Data')
plt.plot(X_test, y_pred, color='red', label='Linear Regression Model')
plt.xlabel('BMI')
plt.ylabel('Disease Progression')
plt.legend(loc='lower right')
plt.show()
fig = go.Figure()
fig.add_trace(go.Scatter(x=X_train, y=y_train, mode='markers', name='Training Data', marker=dict(color='gold')))
fig.add_trace(go.Scatter(x=X_test, y=y_test, mode='markers', name='Test Data', marker=dict(color='brown')))
fig.add_trace(go.Scatter(x=X_test, y=y_pred, mode='lines', name='Linear Regression Model'))
fig.update_traces(line=dict(color='grey'), selector=dict(type='scatter', mode='lines'))
fig.update_layout(title='BMI vs Disease Progression',
xaxis_title='BMI',
yaxis_title='Disease Progression')
fig.show()
mse_test = mean_squared_error(y_test, y_pred)
mse_train = mean_squared_error(y_train, y_pred2)
slope = model.coef_[0]
intrcpt = model.intercept_
print("Mean Squared Error of Test data : ",mse_test)
print("Mean Squared Error of Train data : ",mse_train)
print("Slope : ",slope)
print("Intercept : ",intrcpt)
Mean Squared Error of Test data : 4061.8259284949268 Mean Squared Error of Train data : 3854.11265207582 Slope : 998.5776891375593 Intercept : 152.00335421448167